{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4f0f77e-761c-4eb5-9c55-6eb8ea42dbf2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install -q SPARQLWrapper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19a8fe66-f1f8-4505-a541-e70531372762",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install -e /dss/dsshome1/04/di93zer/git/cellnet --no-deps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "efec7d77-fd77-4224-becf-6b51dba6adc4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from os.path import join\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b02397a-ad12-4797-9e4f-58e3243f5856",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "DATA_PATH = '/mnt/dssmcmlfs01/merlin_cxg_2023_05_15_sf-log1p'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "39bc8541-9f3c-4567-b8a3-93ce7dfb8675",
   "metadata": {},
   "source": [
    "# Compute lookup matrices "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1aeafb86-ead1-4b93-aa3f-80dfb880b663",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cell_type_mapping = pd.read_parquet(join(DATA_PATH, 'categorical_lookup/cell_type.parquet'))\n",
    "\n",
    "inverse_mapping = (\n",
    "    cell_type_mapping\n",
    "    .assign(idx=range(len(cell_type_mapping)))\n",
    "    .set_index('label', drop=True)\n",
    ")\n",
    "inverse_mapping.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bdab6fee-d5dd-4218-8700-12c04942c3cd",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from cellnet.utils.cell_ontology import retrieve_child_nodes_from_ubergraph\n",
    "\n",
    "\n",
    "celltypes = cell_type_mapping.label.tolist()\n",
    "child_nodes_dict = {}\n",
    "for k, v in retrieve_child_nodes_from_ubergraph(celltypes).items():\n",
    "    child_nodes_dict[k] = [elem for elem in v if elem in celltypes]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "667fd053-155c-40e9-9b54-acf0b0bbeb22",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "children_idx = []\n",
    "\n",
    "for cell_type in cell_type_mapping.label:\n",
    "    child_nodes = child_nodes_dict[cell_type]\n",
    "    children_idx.append(inverse_mapping.loc[child_nodes].idx.sort_values().tolist())\n",
    "\n",
    "cell_type_mapping['children'] = children_idx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a05dc6e8-7efb-4f52-903d-e9a0ce9647a0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "os.makedirs(join(DATA_PATH, 'cell_type_hierarchy'), exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46a01942-506b-4a34-ae11-d249f1fcf2ab",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "child_matrix = np.eye(len(cell_type_mapping))\n",
    "\n",
    "for i, child_nodes in enumerate(cell_type_mapping.children):\n",
    "    child_matrix[i, child_nodes] = 1.\n",
    "    \n",
    "with open(join(DATA_PATH, 'cell_type_hierarchy/child_matrix.npy'), 'wb') as f:\n",
    "    np.save(f, child_matrix)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94d8b5d4-e17a-4f52-9813-540ff2aeae20",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "56c3c7b2-2a4d-482a-9922-6d20a3a287b7",
   "metadata": {},
   "source": [
    "# Sanity check lookup matrices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "edef0da1-0a58-4e1c-b785-7d9af54f9486",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cell_type_mapping.loc[np.where(child_matrix[0, :] == 1.)[0]].label.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2cf96a2d-c451-4a6d-bf55-3654d9658788",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
